#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>

#define MAX_PROGRAM_LENGTH 1000

// Struct to hold instruction names and their corresponding opcodes and length of space separated instruction
typedef struct {
    char instruction[7];  // instruction name (5 characters + null terminator)
    unsigned int opcode;  // corresponding opcode
	unsigned int length;
} RV32I_Instruction;

// Instructions defined RV32IM

// Lookup table for RV32I instructions and their opcodes
RV32I_Instruction instruction_set[] = {
    {"ADDI", 0x13, 3}, {"SLTI", 0x13, 3}, {"SLTIU", 0x13, 3}, {"XORI", 0x13, 3}, {"ORI", 0x13, 3},
    {"ANDI", 0x13, 3}, {"LUI",  0x37, 2}, {"AUIPC", 0x17, 2}, {"LW",   0x03, 3}, {"LH",   0x03, 3},
    {"LB",   0x03, 3}, {"LHU",   0x03, 3}, {"LBU",   0x03, 3}, {"SW",   0x23, 3}, {"SB",   0x23, 3},
    {"SH",   0x23, 3}, {"ADD",  0x33, 3}, {"SUB",  0x33, 3}, {"SLL",  0x33, 3}, {"SLT",  0x33, 3},
    {"MUL", 0x33, 3}, {"MULH", 0x33, 3}, {"MULHSU", 0x33, 3}, {"MULHU", 0x33, 3}, {"DIV", 0x33, 3}, 
	{"DIVU", 0x33, 3}, {"REM", 0x33, 3}, {"REMU", 0x33, 3}, {"SLTU", 0x33, 3}, {"XOR",  0x33, 3}, 
	{"SRL",  0x33, 3}, {"SRA",  0x33, 3}, {"SLLI", 0x23, 3}, {"SRLI", 0x23, 3}, {"SRAI", 0x23, 3}, 
	{"OR",   0x33, 3}, {"AND",  0x33, 3}, {"BEQ",  0x63, 3}, {"BNE",  0x63, 3}, {"BLT",  0x63, 3}, 
	{"BGE",  0x63, 3}, {"BLTU", 0x63, 3}, {"BGEU", 0x63, 3}, {"JAL",  0x6f, 2}, {"JALR", 0x67, 2}, 
	{"FENCE", 0x0F, 2}, {"FENCE.I", 0x0F, 0}, {"ECALL", 0x73, 0}, {"EBREAK", 0x73, 0}, {"CSRRW", 0x73, 3}, 
	{"CSRRS", 0x73, 3}, {"CSRRC", 0x73, 3}, {"CSRRWI", 0x73, 3}, {"CSRRSI", 0x73, 3}, {"CSRRCI", 0x73, 3}};

// All registers are named as xi, like x1, x2, x3 .
// lw, lh, lb, sw, sh, sb have format
// lw rd, rs1, offset             and not					lw rs2, offset(rs1)
// beq, bne, blt, bge, bltu and bgeu have format 	beq rs1, rs2, offset	(in 2 bytes)
// Write FENCE as iorw hex		FENCE pred_iorw succ_iorw
// FENCE 0xF 0x1						FENCE iorw w

int reghex (char* reg) {
	if ((strcmp(reg, "x0") == 0) || (strcmp(reg, "zero") == 0)) return 0x01;
	else if ((strcmp(reg, "x1") == 0) || (strcmp(reg, "ra") == 0)) return 0x01;
	else if ((strcmp(reg, "x2") == 0) || (strcmp(reg, "sp") == 0)) return 0x02;
	else if ((strcmp(reg, "x3") == 0) || (strcmp(reg, "gp") == 0)) return 0x03;
	else if ((strcmp(reg, "x4") == 0) || (strcmp(reg, "tp") == 0)) return 0x04;
	else if ((strcmp(reg, "x5") == 0) || (strcmp(reg, "t0") == 0)) return 0x05;
	else if ((strcmp(reg, "x6") == 0) || (strcmp(reg, "t1") == 0)) return 0x06;
	else if ((strcmp(reg, "x7") == 0) || (strcmp(reg, "t2") == 0)) return 0x07;
	else if ((strcmp(reg, "x8") == 0) || (strcmp(reg, "fp") == 0) || (strcmp(reg, "s0") == 0)) return 0x08;
	else if ((strcmp(reg, "x9") == 0) || (strcmp(reg, "s1") == 0)) return 0x09;
	else if ((strcmp(reg, "x10") == 0) || (strcmp(reg, "a0") == 0)) return 0x0A;
	else if ((strcmp(reg, "x11") == 0) || (strcmp(reg, "a1") == 0)) return 0x0B;
	else if ((strcmp(reg, "x12") == 0) || (strcmp(reg, "a2") == 0)) return 0x0C;
	else if ((strcmp(reg, "x13") == 0) || (strcmp(reg, "a3") == 0)) return 0x0D;
	else if ((strcmp(reg, "x14") == 0) || (strcmp(reg, "a4") == 0)) return 0x0E;
	else if ((strcmp(reg, "x15") == 0) || (strcmp(reg, "a5") == 0)) return 0x0F;
	else if ((strcmp(reg, "x16") == 0) || (strcmp(reg, "a6") == 0)) return 0x10;
	else if ((strcmp(reg, "x17") == 0) || (strcmp(reg, "a7") == 0)) return 0x11;
	else if ((strcmp(reg, "x18") == 0) || (strcmp(reg, "s2") == 0)) return 0x12;
	else if ((strcmp(reg, "x19") == 0) || (strcmp(reg, "s3") == 0)) return 0x13;
	else if ((strcmp(reg, "x20") == 0) || (strcmp(reg, "s4") == 0)) return 0x14;
	else if ((strcmp(reg, "x21") == 0) || (strcmp(reg, "s5") == 0)) return 0x15;
	else if ((strcmp(reg, "x22") == 0) || (strcmp(reg, "s6") == 0)) return 0x16;
	else if ((strcmp(reg, "x23") == 0) || (strcmp(reg, "s7") == 0)) return 0x17;
	else if ((strcmp(reg, "x24") == 0) || (strcmp(reg, "s8") == 0)) return 0x18;
	else if ((strcmp(reg, "x25") == 0) || (strcmp(reg, "s9") == 0)) return 0x19;
	else if ((strcmp(reg, "x26") == 0) || (strcmp(reg, "s10") == 0)) return 0x1A;
	else if ((strcmp(reg, "x27") == 0) || (strcmp(reg, "s11") == 0)) return 0x1B;
	else if ((strcmp(reg, "x28") == 0) || (strcmp(reg, "t3") == 0)) return 0x1C;
	else if ((strcmp(reg, "x29") == 0) || (strcmp(reg, "t4") == 0)) return 0x1D;
	else if ((strcmp(reg, "x30") == 0) || (strcmp(reg, "t5") == 0)) return 0x1E;
	else if ((strcmp(reg, "x31") == 0) || (strcmp(reg, "t6") == 0)) return 0x1F;
	else return 0x00;
}

unsigned int get_opcode(const char* instruction) {
    for (int i = 0; i < sizeof(instruction_set) / sizeof(instruction_set[0]); i++) {
        if (strcmp(instruction, instruction_set[i].instruction) == 0) {
            return instruction_set[i].opcode;
        }
    }

    printf("Instruction not found.\n");
    return 0xFFFFFFFF;
}

typedef struct {
	char op[5];
	char reg1[5];
	char reg2[5];
	char imm[5];
} instr;

//Writes binary instruction upto 8192 bits long to file named final
int write_bin(uint32_t *instr, int num_instr) {
    // Open the file "final" in binary write mode
    FILE *file = fopen("final", "wb");
    if (file == NULL) {
        perror("Error opening file");
        return 1;
    }

    // Number of instructions (less than 10000 instructions)
    // Modify this value as needed (ensure it's < 10000)

    // Example instruction: each instruction is a 4-byte (32-bit) value
    fwrite(instr, sizeof(uint32_t), num_instr, file);

    fclose(file);

    printf("Successfully wrote %d instructions (4 bytes each) to the file 'final'.\n", num_instr);

    return 0;
}

int main() {

    FILE *file = fopen("/mnt/dcc/program.s", "r");
    char line[MAX_PROGRAM_LENGTH];

	int num_instr = 0;
	uint32_t instruction[MAX_PROGRAM_LENGTH];

    if (file == NULL) {
        perror("Error opening file");
        return 1;
    }

	char* token;
	unsigned int opcode;
	char* opcode_first;
	char temp[4][5];
	char temp2[4][5];

	instr current[] = {};

    while(fgets(line, sizeof(line), file)) {
		token = strtok(line, " ,");
		for (int i=0; i<=3;i++) {
			for (int j=0; j<=4; j++) {
				temp[i][j] = 0;
				temp2[i][j] = 0;
			}
		}
		int i = 0, j=0;
		while( token!=NULL) {
			strncpy(temp[i], token, 5);
			token = strtok(0, ",");
			i++;
		}

		i=0;
		int j2 = 0;
		while( i<=3 ) {
			j=0, j2=0;
			while( j<=4 ) {
				// Whitelisting chars for commands.
				if ((temp[i][j] >='0' && temp[i][j]<='9') || (temp[i][j] >='a' && temp[i][j]<='z') || (temp[i][j] >='A' && temp[i][j]<='Z')) {
					temp2[i][j2] = temp[i][j];
					j2++;
				}
				j++;
			}
			i++;
		}

		opcode = get_opcode(temp2[0]);		// 2 hex

		int length_instruction = 0;

		for (int i = 0; i < sizeof(instruction_set) / sizeof(instruction_set[0]); i++) {
        if (strcmp(temp2[0], instruction_set[i].instruction) == 0) {
            length_instruction = instruction_set[i].length;
			break;
        	}
    	}

	if (opcode==0x03) {
		if (strcmp(temp2[0], "LB")==0) {
			instruction[num_instr] = (atoi(temp2[3])<<20) + (reghex(temp2[2])<<15) + (reghex(temp2[1])<<7) + opcode;
			num_instr++;
		} else if (strcmp(temp2[0], "LH")==0) {
			instruction[num_instr] = (atoi(temp2[3])<<20) + (((reghex(temp2[2])<<3)+1)<<12) + (reghex(temp2[1]) << 7) + opcode;
			num_instr++;
		} else if (strcmp(temp2[0], "LW")==0) {
			instruction[num_instr] = (atoi(temp2[3])<<20) + (reghex(temp2[2])<<15) + (reghex(temp2[1])<<7) + opcode;
			num_instr++;
		} else if (strcmp(temp2[0], "LBU")==0) {
			instruction[num_instr] = (atoi(temp2[3])<<20) + (((reghex(temp2[2])<<3)+4)<<12) + (reghex(temp2[1]) << 7) + opcode;
			num_instr++;
		} else if (strcmp(temp2[0], "LHU")==0) {
			instruction[num_instr] = (atoi(temp2[3])<<20) + (((reghex(temp2[2])<<3)+5)<<12) + (reghex(temp2[1]) << 7) + opcode;
			num_instr++;
		}
	} else if (opcode==0x0f) {
		if (strcmp(temp2[0], "FENCE")==0) {
			instruction[num_instr] = (reghex(temp2[1])<<25)+ (reghex(temp2[2])<<20) + opcode;
			num_instr++;
		} else if (strcmp(temp2[0], "FENCE.I")==0) {
			instruction[num_instr] = 0x0000100F;
			num_instr++;
		}

	} else if (opcode==0x13) {
		if (strcmp(temp2[0], "ADDI")==0) {
			instruction[num_instr] = (atoi(temp2[3])<<20) + (reghex(temp2[2])<<15) + (reghex(temp2[1]) << 7) + opcode;
			num_instr++;
		} else if (strcmp(temp2[0], "SLTI")==0) {
			instruction[num_instr] = (atoi(temp2[3])<<20) + (((reghex(temp2[2])<<3)+2)<<12) + (reghex(temp2[1]) << 7) + opcode;
			num_instr++;
		} else if (strcmp(temp2[0], "SLTIU")==0) {
			instruction[num_instr] = (atoi(temp2[3])<<20) + (((reghex(temp2[2])<<3)+3)<<12) + (reghex(temp2[1]) << 7) + opcode;
			num_instr++;
		} else if (strcmp(temp2[0], "XORI")==0) {
			instruction[num_instr] = (atoi(temp2[3])<<20) + (((reghex(temp2[2])<<3)+4)<<12) + (reghex(temp2[1]) << 7) + opcode;
			num_instr++;
		} else if (strcmp(temp2[0], "ORI")==0) {
			instruction[num_instr] = (atoi(temp2[3])<<20) + (((reghex(temp2[2])<<3)+6)<<12) + (reghex(temp2[1]) << 7) + opcode;
			num_instr++;
		} else if (strcmp(temp2[0], "ANDI")==0) {
			instruction[num_instr] = (atoi(temp2[3])<<20) + (((reghex(temp2[2])<<3)+7)<<12) + (reghex(temp2[1]) << 7) + opcode;
			num_instr++;
		} else if (strcmp(temp2[0], "SLLI")==0) {
			if(atoi(temp2[3])>=32) {
				printf("Illegal instruction for SLLI. Shamt or offset should be less than 32. ");
			} else {
			instruction[num_instr] = (atoi(temp2[3])<<20) + (((reghex(temp2[2])<<3)+1)<<12) + (reghex(temp2[1]) << 7) + opcode;
			num_instr++;
			}
		} else if (strcmp(temp2[0], "SRLI")==0) {
			if(atoi(temp2[3])>=32) {
				printf("Illegal instruction for SLLI. Shamt or offset should be less than 32. ");
			} else {
			instruction[num_instr] = (atoi(temp2[3])<<20) + (((reghex(temp2[2])<<3)+5)<<12) + (reghex(temp2[1]) << 7) + opcode;
			num_instr++;
			}
		} else if (strcmp(temp2[0], "SRAI")==0) {
			if(atoi(temp2[3])>=32) {
				printf("Illegal instruction for SLLI. Shamt or offset should be less than 32. ");
			} else {
			instruction[num_instr] = (1<<30)+(atoi(temp2[3])<<20) + (((reghex(temp2[2])<<3)+5)<<12) + (reghex(temp2[1]) << 7) + opcode;
			num_instr++;
			}
		}

	} else if (opcode==0x17) {
		instruction[num_instr] = (atoi(temp2[2])<<12) + (reghex(temp2[1]) << 7) + opcode;
		num_instr++;
	} else if (opcode==0x23) {
		int split = (reghex(temp2[3]) >> 5)<<5;
		int lower = reghex(temp2[3]) - split;
		if (strcmp(temp2[0], "SB")==0) {
			//Split imm into 5 bits + 7 bits
			instruction[num_instr] = (split<< 20) + (atoi(temp2[1])<<20) + (reghex(temp2[2])<<15) + (lower << 7) + opcode;
			num_instr++;
		} else if (strcmp(temp2[0], "SH")==0) {
			instruction[num_instr] = (split<< 20) + (atoi(temp2[1])<<20) + (((reghex(temp2[2])<<3)+1)<<12) + (lower << 7) + opcode;
			num_instr++;
		} else if (strcmp(temp2[0], "SW")==0) {
			instruction[num_instr] = (split<< 20) + (atoi(temp2[1])<<20) + (((reghex(temp2[2])<<3)+2)<<12) + (lower << 7) + opcode;
			num_instr++;
		}
	} else if (opcode==0x33) {
		if (strcmp(temp2[0], "ADD")==0) {
			instruction[num_instr] = (atoi(temp2[3])<<20) + (reghex(temp2[2])<<15) + (reghex(temp2[1]) << 7) + opcode;
			num_instr++;
		} else if (strcmp(temp2[0], "SUB")==0) {
			instruction[num_instr] = (1<<30) + (atoi(temp2[3])<<20) + (reghex(temp2[2])<<15) + (reghex(temp2[1]) << 7) + opcode;
			num_instr++;
		} else if (strcmp(temp2[0], "SLL")==0) {
			instruction[num_instr] = (atoi(temp2[3])<<20) + (((reghex(temp2[2])<<3)+1)<<12) + (reghex(temp2[1]) << 7) + opcode;
			num_instr++;
		} else if (strcmp(temp2[0], "SLT")==0) {
			instruction[num_instr] = (atoi(temp2[3])<<20) + (((reghex(temp2[2])<<3)+2)<<12) + (reghex(temp2[1]) << 7) + opcode;
			num_instr++;
		} else if (strcmp(temp2[0], "SLTU")==0) {
			instruction[num_instr] = (atoi(temp2[3])<<20) + (((reghex(temp2[2])<<3)+3)<<12) + (reghex(temp2[1]) << 7) + opcode;
			num_instr++;
		} else if (strcmp(temp2[0], "XOR")==0) {
			instruction[num_instr] = (atoi(temp2[3])<<20) + (((reghex(temp2[2])<<3)+4)<<12) + (reghex(temp2[1]) << 7) + opcode;
			num_instr++;
		} else if (strcmp(temp2[0], "SRL")==0) {
			instruction[num_instr] = (atoi(temp2[3])<<20) + (((reghex(temp2[2])<<3)+5)<<12) + (reghex(temp2[1]) << 7) + opcode;
			num_instr++;
		} else if (strcmp(temp2[0], "SRA")==0) {
			instruction[num_instr] = (1<<30) + (atoi(temp2[3])<<20) + (((reghex(temp2[2])<<3)+5)<<12) + (reghex(temp2[1]) << 7) + opcode;
			num_instr++;
		} else if (strcmp(temp2[0], "OR")==0) {
			instruction[num_instr] = (atoi(temp2[3])<<20) + (((reghex(temp2[2])<<3)+6)<<12) + (reghex(temp2[1]) << 7) + opcode;
			num_instr++;
		} else if (strcmp(temp2[0], "AND")==0) {
			instruction[num_instr] = (atoi(temp2[3])<<20) + (((reghex(temp2[2])<<3)+7)<<12) + (reghex(temp2[1]) << 7) + opcode;
			num_instr++;
		} else if (strcmp(temp2[0], "MUL")==0) {
			instruction[num_instr] = (1<<25) + (atoi(temp2[3])<<20) + (reghex(temp2[2])<<15) + (reghex(temp2[1]) << 7) + opcode;
			num_instr++;
		} else if (strcmp(temp2[0], "MULH")==0) {
			instruction[num_instr] = (1<<25) + (atoi(temp2[3])<<20) + (((reghex(temp2[2])<<3)+1)<<12) + (reghex(temp2[1]) << 7) + opcode;
			num_instr++;
		} else if (strcmp(temp2[0], "MULHSU")==0) {
			instruction[num_instr] = (1<<25) + (atoi(temp2[3])<<20) + (((reghex(temp2[2])<<3)+2)<<12) + (reghex(temp2[1]) << 7) + opcode;
			num_instr++;
		} else if (strcmp(temp2[0], "MULHU")==0) {
			instruction[num_instr] = (1<<25) + (atoi(temp2[3])<<20) + (((reghex(temp2[2])<<3)+3)<<12) + (reghex(temp2[1]) << 7) + opcode;
			num_instr++;
		} else if (strcmp(temp2[0], "DIV")==0) {
			instruction[num_instr] = (1<<25) + (atoi(temp2[3])<<20) + (((reghex(temp2[2])<<3)+4)<<12) + (reghex(temp2[1]) << 7) + opcode;
			num_instr++;
		} else if (strcmp(temp2[0], "DIVU")==0) {
			instruction[num_instr] = (1<<25) + (atoi(temp2[3])<<20) + (((reghex(temp2[2])<<3)+5)<<12) + (reghex(temp2[1]) << 7) + opcode;
			num_instr++;
		} else if (strcmp(temp2[0], "REM")==0) {
			instruction[num_instr] = (1<<25) + (atoi(temp2[3])<<20) + (((reghex(temp2[2])<<3)+6)<<12) + (reghex(temp2[1]) << 7) + opcode;
			num_instr++;
		} else if (strcmp(temp2[0], "REMU")==0) {
			instruction[num_instr] = (1<<25) + (atoi(temp2[3])<<20) + (((reghex(temp2[2])<<3)+7)<<12) + (reghex(temp2[1]) << 7) + opcode;
			num_instr++;
		}
	} else if (opcode==0x37) {
		instruction[num_instr] = (atoi(temp2[2])<<12) + (reghex(temp2[1])<< 7) + opcode;
		num_instr++;
	} else if (opcode==0x63) {

		long left=0, right=0;
		left = (atoi(temp2[2])>>12);
		right = (atoi(temp2[2])>>11) - (left<<1);
		left = (left<<6) + (atoi(temp2[2])>>5) -(right<<6)-(left<<7);						//left ready [12][10:5]
		right = ((atoi(temp2[2])>>1) - ((atoi(temp2[2])>>5)<<4))<<1 + right;		//right ready [4:1][11]

		if (strcmp(temp2[0], "BEQ")==0) {
			instruction[num_instr] = (left<<25) + (atoi(temp2[2])<<20) + (reghex(temp2[1])<<15) + (right<<7) + opcode;
			num_instr++;
		} else if (strcmp(temp2[0], "BNE")==0) {
			instruction[num_instr] = (left<<25) + (atoi(temp2[2])<<20) + (((reghex(temp2[1])<<3)+1)<<12) + (right<<7) + opcode;
			num_instr++;
		} else if (strcmp(temp2[0], "BLT")==0) {
			instruction[num_instr] = (left<<25) + (atoi(temp2[2])<<20) + (((reghex(temp2[1])<<3)+4)<<12) + (right<<7) + opcode;
			num_instr++;
		} else if (strcmp(temp2[0], "BGE")==0) {
			instruction[num_instr] = (left<<25) + (atoi(temp2[2])<<20) + (((reghex(temp2[1])<<3)+5)<<12) + (right<<7) + opcode;
			num_instr++;
		} else if (strcmp(temp2[0], "BLTU")==0) {
			instruction[num_instr] = (left<<25) + (atoi(temp2[2])<<20) + (((reghex(temp2[1])<<3)+6)<<12) + (right<<7) + opcode;
			num_instr++;
		}else if (strcmp(temp2[0], "BGEU")==0) {
			instruction[num_instr] = (left<<25) + (atoi(temp2[2])<<20) + (((reghex(temp2[1])<<3)+7)<<12) + (right<<7) + opcode;
			num_instr++;
		}
	} else if (opcode==0x67) {
		instruction[num_instr] = (atoi(temp2[3])<<20) + (reghex(temp2[2])<<15) + (reghex(temp2[1])<<7) + opcode;		
		num_instr++;
	} else if (opcode==0x6f) {
		instruction[num_instr] = (atoi(temp2[2])<<12) + (reghex(temp2[1])<<7) + opcode;
		num_instr++;
	} else if (opcode==0x73) {
		if (strcmp(temp2[0], "ECALL")==0) {
			instruction[num_instr] = 0x00000073;
			num_instr++;
		} else if (strcmp(temp2[0], "EBREAK")==0) {
			instruction[num_instr] = 0x00100073;
			num_instr++;
		} else if (strcmp(temp2[0], "CSRRW")==0) {
			instruction[num_instr] = (atoi(temp2[2])<<20) + (((reghex(temp2[3])<<3)+1)<<12) + (reghex(temp2[1]) << 7) + opcode;
			num_instr++;
		} else if (strcmp(temp2[0], "CSRRS")==0) {
			instruction[num_instr] = (atoi(temp2[2])<<20) + (((reghex(temp2[3])<<3)+2)<<12) + (reghex(temp2[1]) << 7) + opcode;
			num_instr++;
		} else if (strcmp(temp2[0], "CSRRC")==0) {
			instruction[num_instr] = (atoi(temp2[2])<<20) + (((reghex(temp2[3])<<3)+3)<<12) + (reghex(temp2[1]) << 7) + opcode;
			num_instr++;
		} else if (strcmp(temp2[0], "CSRRWI")==0) {
			instruction[num_instr] = (atoi(temp2[2])<<20) + (((reghex(temp2[3])<<3)+5)<<12) + (reghex(temp2[1]) << 7) + opcode;
			num_instr++;
		} else if (strcmp(temp2[0], "CSRRSI")==0) {
			instruction[num_instr] = (atoi(temp2[2])<<20) + (((reghex(temp2[3])<<3)+6)<<12) + (reghex(temp2[1]) << 7) + opcode;
			num_instr++;
		} else if (strcmp(temp2[0], "CSRRCI")==0) {
			instruction[num_instr] = (atoi(temp2[2])<<20) + (((reghex(temp2[3])<<3)+7)<<12) + (reghex(temp2[1]) << 7) + opcode;
			num_instr++;
		}
	}

		// Print the opcode if it's found
    	if (opcode == 0xFFFFFFFF) {
        	printf("Faulty program. Instruction not available.");
    	}
    }

	write_bin(instruction, num_instr);
    fclose(file);

    return 0;
}